library(tidyverse)
library(DescTools)

setwd("C:/Users/stien/Documents/Facebook/replication material")
dat <- read_csv("data/user_info_.csv")

# set up models

dat$ageyrs <- 2016-dat$birthyr
dat$age <- cut(dat$ageyrs, c(18,
                             29,
                             44,
                             65,
                             max(dat$ageyrs, na.rm = TRUE)), include.lowest = TRUE, 
               labels = c("18-29", "30-44", "45-65", "Over 65"))

#dat$party <- factor(dat$pid3, labels = c("Democrat", "Republican", "Independent","Other", "Not sure")
#dat$party <- relevel(dat$party, ref = "Not sure")

# value       label
# 1           Democrat
# 2           Republican
# 3           Independent
# 4           Other
# 5           Not sure

#dat[dat == 6] <- NA
dat$pid7[which(dat$pid7 == 8)] <- NA

dat$party = cut(dat$pid7, c(0,3,4,7), labels = c("Democrat", "Independent", "Republican"))
dat$party <- relevel(dat$party, ref = "Independent")

#dat$party7 <- factor(dat$pid7, labels = c("Very strong Democrat", "Strong democrat","lean democrat","Independent", "Lean republican", "Strong republican","Very strong republican"))
#dat$party7 <- relevel(dat$party, ref = "Independent")

dat$strength <- 0
dat$strength[dat$pid7 == 8] <- NA
dat$strength[dat$pid7 == 1] <- 4
dat$strength[dat$pid7 == 2] <- 3
dat$strength[dat$pid7 == 3] <- 2

dat$strength[dat$pid7 == 7] <- 4
dat$strength[dat$pid7 == 6] <- 3
dat$strength[dat$pid7 == 5] <- 2
dat$strength[dat$pid7 == 4] <- 1


#categorical
#dat$party7 <- factor(dat$pid7, labels = c("Very strong", "Not very strong", "Leaning", "Independent"))
#dat$party7 <- relevel(dat$party7, ref = "Independent")

dat$ideology <- factor(dat$ideo5, labels = c("Very liberal", "Liberal", "Moderate", "Conservative",
                                             "Very conservative", "Not sure"))

dat$ideology[which(dat$ideology == "Not sure")] <- NA
dat$ideology <- relevel(dat$ideology, ref = "Moderate")

# value   label
# 1       Very liberal
# 2       Liberal
# 3       Moderate
# 4       Conservative
# 5       Very conservative
# 6       Not sure


dat$female <- ifelse(dat$gender == 2, 1, 0)
dat$black <- ifelse(dat$race == 2, 1, 0)
dat$race4 <- factor(ifelse(dat$race > 3, 4, dat$race), labels = c("White", "Black", 
                                                                  "Hispanic", "Other"))
dat$faminc[which(dat$faminc == 97)] <- NA

dat$newsint[which(dat$newsint == 7)] <- NA
dat$newsint <- - dat$newsint

# dat$user_count <- (dat$user_count - mean(dat$user_count)) / sd(dat$user_count)

# regs

# beta regs (www.jstatsoft.org/article/view/v034i02/v34i02.pdf)

library(estimatr)
library(stargazer)
library(betareg)
library(rcompanion)
library(DescTools)

# cramer ideo5
mod.1 <- betareg(resp_cramer_v ~ age + race4 + female + faminc + educ + ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)

summary(mod.1)
varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "Very Liberal", "Liberal",
              "Conservative", "Very Conservative", "Political news interest", "Number of likes") 

sink(file = "results/beta_reg_cramer.tex")

stargazer(mod.1, 
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c("Homogeneity"), 
          notes = "Beta regressions with survey weights applied.")
sink()

# entropy, cramer, variance ideo5 (Table SA5)
mod.1 <- betareg(resp_cramer_v ~ age + race4 + female + faminc + educ + ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.2 <- betareg(resp_entropy ~ age + race4 + female + faminc + educ +ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.3 <- betareg(resp_variance ~ age + race4 + female + faminc + educ + ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)

varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "Very Liberal", "Liberal",
              "Conservative", "Very Conservative", "Political news interest", "Number of likes") 

sink(file = "results/beta_reg_all.tex")

stargazer(mod.1, mod.2, mod.3,  
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity for different metrics of homogeneity",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c("Cramer's V", "Entropy","Variance"), 
          notes = "Beta regressions with survey weights applied.")
sink()


#per category ideo5 (Table 8)
mod.1 <- betareg(cramer_v_politics ~ age + race4 + female + faminc + educ + ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.2 <- betareg(cramer_v_polnews ~ age + race4 + female + faminc + educ +ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.3 <- betareg(cramer_v_hardnews ~ age + race4 + female + faminc + educ + ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.4 <- betareg(cramer_v_lifestyle ~ age + race4 + female + faminc + educ + ideology + newsint + log(user_count), data = dat, weights = weight_svypop_w1)


varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "Very Liberal", "Liberal",
              "Conservative", "Very Conservative", "Political news interest", "Number of likes")

sink(file = "results/beta_reg_cats.tex")
stargazer(mod.1, mod.2, mod.3, mod.4, 
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity per category",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c('Politics', 'Political news','Hardnews', 'Lifestyle'), 
          notes = "Beta regressions with survey weights applied.")
sink()

#per category weighted for individual number of likes (Table SA6)
mod.1 <- betareg(resp_cramer_v ~ age + race4 + female + faminc + educ + ideology +  newsint + log(user_count), data = dat, weights = user_count/300)

varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "Very Liberal", "Liberal",
              "Conservative", "Very Conservative", "Political news interest", "Number of likes")

sink(file = "results/beta_reg_numlikes.tex")
stargazer(mod.1, 
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity per category weighted for individual number of page likes",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c('Homogeneity'), 
          notes = "Beta regressions weighted for individual number of page likes.")
sink()


#diagnostics
car::vif(mod.4)
par(mfrow = c(2, 2))
plot(mod.1)

qqnorm(residuals(mod.2, type = "sweighted2"))
qqline(residuals(mod.2, type = "sweighted2"))

summary(mod.2)

# categories with pid7 
mod.1 <- betareg(cramer_v_politics ~ age + race4 + female + faminc + educ +  strength + party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.2 <- betareg(cramer_v_polnews ~ age + race4 + female + faminc + educ + strength + party + newsint +  log(user_count), data = dat, weights = weight_svypop_w1)
mod.3 <- betareg(cramer_v_hardnews ~ age + race4 + female + faminc + educ + strength + party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.4 <- betareg(cramer_v_lifestyle ~ age + race4 + female + faminc + educ + strength + party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)

summary(mod.1)

varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "PID strength", "Democrat","Republican", "Political news interest", "Number of likes") 

sink(file = "results/beta_reg_cats_pid7.tex")
stargazer(mod.1, mod.2, mod.3, mod.4, 
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity with party identification (PID) strength",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c('Politics', 'Political news','Hardnews', 'Lifestyle'), 
          notes = "Beta regressions with survey weights applied.")
sink()

# categories with pid7 as dummy
mod.1 <- betareg(cramer_v_politics ~ age + race4 + female + faminc + educ +  party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.2 <- betareg(cramer_v_polnews ~ age + race4 + female + faminc + educ + party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.3 <- betareg(cramer_v_hardnews ~ age + race4 + female + faminc + educ + party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)
mod.4 <- betareg(cramer_v_lifestyle ~ age + race4 + female + faminc + educ + party + newsint + log(user_count), data = dat, weights = weight_svypop_w1)

summary(mod.1)

varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "Very strong Democrat", "Strong democrat","Lean democrat", 
              "Lean republican", "Strong republican","Very strong republican", "Political news interest", 
              "Number of likes") 

sink(file = "results/beta_reg_cats_pid7dummy.tex")
stargazer(mod.1, mod.2, mod.3, mod.4, 
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity with party identification (PID) strength",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c('Politics', 'Political news','Hardnews', 'Lifestyle'), 
          notes = "Beta regressions with survey weights applied.")
sink()

# cramer pid7 strength (Table SA7)
mod.1 <- betareg(resp_cramer_v ~ age + race4 + female + faminc + educ + pid7 + party + newsint + user_count, data = dat, weights = weight_svypop_w1)


varnames <- c("(Intercept)", "Age: 30-44", "Age: 45-65", "Age: Over 65", "Black", "Hispanic",
              "Other Race", "Female", "Income", "Education",
              "PID strength", "Democrat","Republican", "Political news interest", "Number of likes") 

sink(file = "results/beta_reg_cramer_pid7.tex")
stargazer(mod.1, 
          style = "apsr", align = TRUE, 
          title = "Determinants of individual homogeneity with party identification (PID) strength",
          covariate.labels = c(varnames[-1]),
          dep.var.labels = c("Homogeneity"), 
          notes = "Beta regressions with survey weights applied.")
sink()
